/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggrepel)
library(BayesPrism)
## Loading required package: snowfall
## Loading required package: snow
## Loading required package: NMF
## Loading required package: pkgmaker
## Loading required package: registry
## Loading required package: rngtools
## Loading required package: cluster
## NMF - BioConductor layer [OK] | Shared memory capabilities [NO: bigmemory] | Cores 63/64
## To enable shared memory capabilities, try: install.extras('
## NMF
## ')
## Warning: replacing previous import 'gplots::lowess' by 'stats::lowess' when
## loading 'BayesPrism'
## Warning: replacing previous import 'BiocParallel::register' by 'NMF::register'
## when loading 'BayesPrism'
dir.create(params$out_path)
## Warning in dir.create(params$out_path):
## '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/figures/ssGSEA'
## already exists
plot_geneset = function(id, group_pal = c('WT' = 'blue', 'EZHIP' = 'red')){
id_plot = str_replace_all(id, ' ', '_')
id_plot = str_replace_all(id_plot, '[.]', '_')
id_plot = str_replace_all(id_plot, '[/]', '_')
p = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
filter(ID == id) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution')),
Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group)) +
geom_point(size = 5, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank()) +
facet_wrap(.~name, scales = 'free_y') +
ggtitle(id)
p %>% print
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.dotplot.pdf'))
p %>% print
dev.off()
genes = (deg %>%
filter(ENS %in% unlist(atlas[['hg_ens']][id])) %>%
filter(padj < 0.05 & log2FoldChange > 0))$ENS
p = counts %>%
filter(ENS %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
left_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group)) +
geom_point(size = 2, alpha = 0.8) +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none',
strip.background = element_blank()) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Norm Counts') +
ggtitle(id)
p %>% print
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.genes_dotplot.pdf'))
p %>% print
dev.off()
mat = counts %>%
filter(ENS %in% genes) %>%
select(-ENS) %>%
column_to_rownames('SYM') %>%
t()
mat = t(scale(mat))
col_fun = circlize::colorRamp2(c(range(mat)[1], 0, range(mat)[2]), c("#235789", "#F2EFC7", "#BC412B"))
mat = mat[,meta$Nickname, drop=F]
ha = columnAnnotation(cond = meta$Group, col = list(cond = group_pal))
h = Heatmap(mat,
name = 'RNA',
bottom_annotation = ha,
row_title = str_replace(params$atlas_prefix, '_', ' '),
col = col_fun,
width = ncol(mat)*unit(4, "mm"),
height = nrow(mat)*unit(4, "mm"),
rect_gp = gpar(color = 'black'),
show_row_dend = F)
draw(h, heatmap_legend_side = "left", annotation_legend_side = 'left')
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.heatmap.pdf'), height = 10)
draw(h, heatmap_legend_side = "left", annotation_legend_side = 'left')
dev.off()
}
plot_geneset_clin = function(id, group_pal = c('EZHIP_neg' = 'blue', 'H3K27me3_Low' = 'deepskyblue', 'EZHIP_pos' = 'red')){
id_plot = str_replace_all(id, ' ', '_')
id_plot = str_replace_all(id_plot, '[.]', '_')
id_plot = str_replace_all(id_plot, '[/]', '_')
p = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
filter(ID == id) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution')),
Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group)) +
geom_point(size = 5, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank()) +
facet_wrap(.~name, scales = 'free_y') +
ggtitle(id)
p %>% print
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.dotplot.pdf'))
p %>% print
dev.off()
genes = (deg %>%
filter(ENS %in% unlist(atlas[['hg_ens']][id])) %>%
filter(log2FoldChange > 0.5))$ENS
p = counts %>%
filter(ENS %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
left_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group)) +
geom_point(size = 2, alpha = 0.8) +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none',
strip.background = element_blank()) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Norm Counts') +
ggtitle(id)
p %>% print
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.genes_dotplot.pdf'))
p %>% print()
dev.off()
mat = counts %>%
filter(ENS %in% genes) %>%
select(-ENS) %>%
column_to_rownames('SYM') %>%
t()
mat = t(scale(mat))
col_fun = circlize::colorRamp2(c(range(mat)[1], 0, range(mat)[2]), c("#235789", "#F2EFC7", "#BC412B"))
mat = mat[,meta$Nickname, drop=F]
colnames(mat) == meta$Nickname
ha = columnAnnotation(cond = meta$Group, col = list(cond = group_pal))
h = Heatmap(mat,
name = 'RNA',
bottom_annotation = ha,
row_title = str_replace(params$atlas_prefix, '_', ' '),
col = col_fun,
width = ncol(mat)*unit(4, "mm"),
height = nrow(mat)*unit(4, "mm"),
rect_gp = gpar(color = 'black'),
show_row_dend = F)
draw(h, heatmap_legend_side = "left", annotation_legend_side = 'left')
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.', id_plot, '.heatmap.pdf'), height = 10)
draw(h, heatmap_legend_side = "left", annotation_legend_side = 'left')
dev.off()
}
sample_prefix = 'hMSC'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/01_raptor_exprtools/EZHIP/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 6 column names but the
## data has 7 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/WTvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/03_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Smooth muscle"
plot_geneset(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/03_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Pericytes"
plot_geneset(id)
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Removed 6 rows containing missing values (`geom_point()`).
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/hMSC/out/03_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "ACTA2+ MYH11+ MYL9+ Smooth muscle cells"
plot_geneset(id)
## png
## 2
sample_prefix = 'MG63_cell'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/01_raptor_exprtools/EZHIP/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 6 column names but the
## data has 7 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/WTvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/03_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Smooth muscle"
plot_geneset(id)
## png
## 2
id = "Osteo-CAR"
plot_geneset(id)
## png
## 2
id = "Adipo-CAR"
plot_geneset(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/03_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Pericytes"
plot_geneset(id)
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Removed 6 rows containing missing values (`geom_point()`).
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/MG63/out/03_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "ACTA2+ MYH11+ MYL9+ Smooth muscle cells"
plot_geneset(id)
## png
## 2
sample_prefix = 'KHOS_cell'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/01_raptor_exprtools/EZHIP/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 6 column names but the
## data has 7 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/WTvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/03_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Osteo-CAR"
plot_geneset(id)
## png
## 2
id = "Adipo-CAR"
plot_geneset(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/03_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Early_Osteoprogenitors"
plot_geneset(id)
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/KHOS/out/03_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "PAX7+ DLK1+ MuSCs and progenitors"
plot_geneset(id)
## png
## 2
sample_prefix = 'U2OS'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/01_raptor_exprtools/EZHIP_minus_C77_C2/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 6 column names but the
## data has 7 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/EZHIP_KOvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/03_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Smooth muscle"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
id = "Osteo-CAR"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
id = "Adipo-CAR"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
id = "Ng2+ MSCs"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/03_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "OLC_1"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Removed 6 rows containing missing values (`geom_point()`).
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/U2OS/out/03_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "PAX7low MYF5+ MuSCs and progenitors"
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
id = 'ACTA1+ Mature skeletal muscle'
plot_geneset(id, group_pal = c('EZHIP_KO' = 'blue', 'EZHIP' = 'red'))
## png
## 2
sample_prefix = 'MG63_Xgraft'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/01_raptor_exprtools/MG63/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/hg19.Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/hg19.Ensembl.ensGene.exon.norm.tsv.gz")): Detected 5 column names but
## the data has 6 columns (i.e. invalid file). Added 1 extra default column name
## for the first column which is guessed to be row names or an index. Use
## setnames() afterwards if this guess is not correct, or fix the file write
## command that created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/hg19.Ensembl.ensGene.exon/WTvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/MG63/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Development_Baccin_mm/MG63/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Adipo-CAR"
plot_geneset(id)
## png
## 2
id = "Osteo-CAR"
plot_geneset(id)
## png
## 2
id = "Chondrocytes"
plot_geneset(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/MG63/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Development_Baryawno_mm/MG63/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 20 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 20 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Early_Osteoprogenitors"
plot_geneset(id)
## png
## 2
id = "Osteoprogenitors"
plot_geneset(id)
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/MG63/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Muscle_Tissue_hg/MG63/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "PAX7+ DLK1+ MuSCs and progenitors"
plot_geneset(id)
## png
## 2
sample_prefix = 'KHOS_Xgraft'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/01_raptor_exprtools/KHOS/"
group_pal = c('blue', 'red')
names(group_pal) = data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/hg19.Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/hg19.Ensembl.ensGene.exon.norm.tsv.gz")): Detected 6 column names but
## the data has 7 columns (i.e. invalid file). Added 1 extra default column name
## for the first column which is guessed to be row names or an index. Use
## setnames() afterwards if this guess is not correct, or fix the file write
## command that created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/hg19.Ensembl.ensGene.exon/WTvsEZHIP.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/KHOS/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Development_Baccin_mm/KHOS/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Fibro/Chondro p."
plot_geneset(id)
## png
## 2
id = "Osteo-CAR"
plot_geneset(id)
## png
## 2
id = "Chondrocytes"
plot_geneset(id)
## png
## 2
id = "Adipo-CAR"
plot_geneset(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/KHOS/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Development_Baryawno_mm/KHOS/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat_ssgsea = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat_ssgsea) = 'p_val'
# adjust p-value
stat_ssgsea = stat_ssgsea %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat_ssgsea = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat_ssgsea, by = 'ID')
stat_ssgsea
tmp = theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'frac') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat_deconv = as.data.frame(t((rbind(by(tmp, tmp$ID, function(frac) { t.test(frac~Group, data=frac, paired=FALSE)$p.value })))))
names(stat_deconv) = 'p_val'
# adjust p-value
stat_deconv = stat_deconv %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat_deconv = theta %>%
as.data.frame() %>%
rownames_to_column('ID') %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'frac') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(frac)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat_deconv, by = 'ID')
stat_deconv
full_join(stat_ssgsea, stat_deconv, by = 'ID', suffix = c(".ssgsea", ".deconv")) %>%
ggplot(aes(log2(log2_FC.ssgsea+1), log2(log2_FC.deconv+1), label = ID)) +
geom_point() +
geom_text_repel()
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 7 rows containing missing values (`geom_point()`).
## Warning: Removed 7 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 24 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "OLC_1"
plot_geneset(id)
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Removed 6 rows containing missing values (`geom_point()`).
## png
## 2
id = "Chondro_progen"
plot_geneset(id)
## png
## 2
id = "Osteoprogenitors"
plot_geneset(id)
## png
## 2
id = "Early_Osteoprogenitors"
plot_geneset(id)
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/04_ssGSEA/KHOS/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/xgrafts/out/05_deconvolution/Skeletal_Muscle_Tissue_hg/KHOS/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
tmp = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname'))
# calculate p-value
stat = as.data.frame(t((rbind(by(tmp, tmp$ID, function(ssGSEA) { t.test(ssGSEA~Group, data=ssGSEA, paired=FALSE)$p.value })))))
names(stat) = 'p_val'
# adjust p-value
stat = stat %>%
rownames_to_column('ID') %>%
mutate(adj_p_val = p.adjust(p_val, method = 'fdr')) %>%
arrange(adj_p_val)
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC) %>%
full_join(stat, by = 'ID')
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "ACTA2+ MYH11+ MYL9+ Smooth muscle cells"
plot_geneset(id)
## png
## 2
sample_prefix = 'clinical'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/01_raptor_exprtools/EZHIP_neg_VsEZHIP_pos_no_reps/"
group_pal = c('blue', 'red', 'gold')
names(group_pal) = c(data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group, 'S12003')
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 24 column names but the
## data has 25 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/EZHIP_negvsEZHIP_pos.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
genes = c('CXorf67')
p = counts %>%
filter(SYM %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
left_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group, label = Patient_ID)) +
geom_point(size = 2, alpha = 0.8) +
geom_text_repel() +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
aspect.ratio = 1) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Norm Counts')
p
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order),
Group = ifelse(sample == 'S12003', 'S12003', Group)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Ng2+ MSCs"
plot_geneset_clin(id)
## png
## 2
id = "Osteo-CAR"
plot_geneset_clin(id)
## png
## 2
id = "Chondrocytes"
plot_geneset_clin(id)
## png
## 2
id = "Adipo-CAR"
plot_geneset_clin(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
# add log2 FC
stat_ssgsea = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat_ssgsea
# add log2 FC
stat_deconv = theta %>%
as.data.frame() %>%
rownames_to_column('ID') %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'frac') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(frac)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat_deconv
order = (stat_ssgsea %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order),
Group = ifelse(sample == 'S12003', 'S12003', Group)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 96 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 96 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Preosteoblasts"
plot_geneset_clin(id)
## png
## 2
id = "Osteoprogenitors"
plot_geneset_clin(id)
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea)
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution')%>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order),
Group = ifelse(sample == 'S12003', 'S12003', Group)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = c(group_pal, 'S12003' = 'gold')) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "PAX7+ DLK1+ MuSCs and progenitors"
plot_geneset_clin(id)
## png
## 2
sample_prefix = 'clinical_filt'
raptor_path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/01_raptor_exprtools/EZHIP_neg_VsEZHIP_pos_no_reps/"
group_pal = c('blue', 'red')
names(group_pal) = c(data.table::fread(paste0(raptor_path, "info.groups.tsv"))$Group)
meta = data.table::fread(paste0(raptor_path, "info.samples.tsv" ))
counts = data.table::fread(paste0(raptor_path, "/counts/Ensembl.ensGene.exon.norm.tsv.gz")) %>%
separate(V1, sep = ':', into = c('ENS', 'SYM'))
## Warning in data.table::fread(paste0(raptor_path,
## "/counts/Ensembl.ensGene.exon.norm.tsv.gz")): Detected 24 column names but the
## data has 25 columns (i.e. invalid file). Added 1 extra default column name for
## the first column which is guessed to be row names or an index. Use setnames()
## afterwards if this guess is not correct, or fix the file write command that
## created the file to create a valid file.
deg = data.table::fread(paste0(raptor_path, "/diff/Ensembl.ensGene.exon/EZHIP_negvsEZHIP_pos.tsv"))%>%
separate(ID, sep = ':', into = c('ENS', 'SYM'))
genes = c('CXorf67')
p = counts %>%
filter(SYM %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
left_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
arrange(desc(value)) %>%
mutate(rank = dense_rank((value)),
lab = ifelse(Group == 'EZHIP_pos',Patient_ID, NA)) %>%
ggplot(aes(rank, value, color = Group, label = lab)) +
geom_point(size = 2, alpha = 0.8) +
geom_text_repel() +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
aspect.ratio = 1) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Norm Counts') +
xlab('Rank')
p
## Warning: Removed 19 rows containing missing values (`geom_text_repel()`).
# Save top/low EZHIP IDs for later
samples = (counts %>%
filter(SYM %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
left_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
arrange(desc(value)) %>%
mutate(rank = dense_rank((value))) %>%
filter(Group == 'EZHIP_pos' & value > 10 | Group == 'EZHIP_neg' & value == 0))$name
meta = meta %>% filter(Nickname %in% samples)
counts = counts[, c('ENS', 'SYM', samples)]
genes = c('CXorf67')
p = counts %>%
filter(SYM %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
inner_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, log2(value+1), color = Group, label = Patient_ID)) +
geom_point(size = 2, alpha = 0.8) +
geom_text_repel() +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
aspect.ratio = 1) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Log2 Norm Counts')
p
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p = counts %>%
filter(SYM %in% genes) %>%
select(-ENS) %>%
pivot_longer(-SYM) %>%
inner_join(meta, by = c('name' = 'Nickname')) %>%
mutate(Group = factor(Group, levels = names(group_pal))) %>%
ggplot(aes(Group, value, color = Group, label = Patient_ID)) +
geom_point(size = 2, alpha = 0.8) +
geom_text_repel() +
facet_wrap(.~SYM, scales = 'free_y') +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 10),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
aspect.ratio = 1) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3)) +
ylab('Norm Counts')
p
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/baccin_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Development_Baccin_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea) %>% as.data.frame()
ssgsea = ssgsea[,c('ID', samples)]
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta = theta[,samples]
theta
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
inner_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
inner_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
arrange(Group) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = "Ng2+ MSCs"
plot_geneset_clin(id)
## png
## 2
id = "Smooth muscle"
plot_geneset_clin(id)
## png
## 2
id = "Osteo-CAR"
plot_geneset_clin(id)
## png
## 2
id = "Chondrocytes"
plot_geneset_clin(id)
## png
## 2
id = "Adipo-CAR"
plot_geneset_clin(id)
## png
## 2
atlas_prefix = 'baryawno'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Development_Baryawno_mm/out/01_gene_sets/develop_bone_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/baryawno_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Development_Baryawno_mm/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea) %>% as.data.frame()
ssgsea = ssgsea[,c('ID', samples)]
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta = theta[,samples]
theta
# add log2 FC
stat_ssgsea = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat_ssgsea
# add log2 FC
stat_deconv = theta %>%
as.data.frame() %>%
rownames_to_column('ID') %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'frac') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(frac)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat_deconv
order = (stat_ssgsea %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order)) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
arrange(Group) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
## Warning: Removed 44 rows containing missing values (`geom_point()`).
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
## Warning: Removed 44 rows containing missing values (`geom_point()`).
dev.off()
## png
## 2
id = "Preosteoblasts"
plot_geneset_clin(id)
## png
## 2
id = "Osteoprogenitors"
plot_geneset_clin(id)
## png
## 2
atlas_prefix = 'demicheli'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Skeletal_Muscle_Tissue_hg/out/02_gene_sets/skeletal_muscle_atlas_gene_sets.n100.Rda'
ssgsea = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/04_ssGSEA/demicheli_n100.ssgsea.txt'
deconv = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/05_deconvolution/Skeletal_Muscle_Tissue_hg/bp.res.Rda'
loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}
atlas = loadRData(atlas_path)
load(deconv)
ssgsea = data.table::fread(ssgsea) %>% as.data.frame()
ssgsea = ssgsea[, c('ID', samples)]
ssgsea
theta = get.fraction(bp=bp.res,
which.theta="final",
state.or.type="type") * 100
theta = theta %>% t %>% as.data.frame()
theta = theta[, samples]
theta
# add log2 FC
stat = ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')) %>%
group_by(ID, Group) %>%
mutate(mean = mean(ssGSEA)) %>%
ungroup() %>%
distinct(ID, mean, Group) %>%
pivot_wider(names_from = Group, values_from = mean) %>%
mutate(log2_FC = (.data[[names(group_pal)[2]]]) - (.data[[names(group_pal)[1]]])) %>%
arrange(desc(log2_FC)) %>%
select(ID, log2_FC)
stat
order = (stat %>% arrange(log2_FC))$ID
p1 = full_join(
ssgsea %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'ssGSEA') %>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
theta %>%
rownames_to_column('ID') %>%
as.data.frame() %>%
pivot_longer(-ID, names_to = 'sample', values_to = 'Deconvolution')%>%
left_join(meta %>% select(-ID), by = c('sample' = 'Nickname')),
by = c('sample', 'ID', 'Group')
) %>%
select(ID, sample, Group, ssGSEA, Deconvolution) %>%
mutate(ID = factor(ID, levels = order)) %>%
arrange(Group) %>%
pivot_longer(-c(ID, sample, Group)) %>%
mutate(name = factor(name, levels = c('ssGSEA', 'Deconvolution'))) %>%
ggplot(aes(ID, value, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = group_pal) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title = element_blank(),
legend.position = 'none',
strip.background = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey",
size = 0.3,
linetype = 2)) +
facet_wrap(.~name, scales = 'free_x') +
coord_flip()
p1
pdf(paste0(params$out_path, sample_prefix, '.', atlas_prefix, '.pdf'))
p1
dev.off()
## png
## 2
id = 'ACTA1+ Mature skeletal muscle'
plot_geneset_clin(id)
## png
## 2
id = "PAX7+ DLK1+ MuSCs and progenitors"
plot_geneset_clin(id)
## png
## 2
pattern = 'baccin_n100.ssgsea.txt'
group_pal = c('WT' = 'blue', 'EZHIP' = 'red')
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:4){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell', 'hMSC_cell'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "hMSC_cell"
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('MG63_cell', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "MG63_cell"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'KHOS_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "KHOS_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "U2OS_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = 'baccin_n100.ssgsea.txt',
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
group_pal = c('EZHIP_neg' = 'blue', 'EZHIP_pos' = 'red')
clical_meta = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/01_raptor_exprtools/EZHIP_neg_VsEZHIP_pos_no_reps/info.samples.tsv'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('clinical'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "clinical"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.table::fread(clical_meta) %>%
mutate(Sample = paste0('clinical.', Nickname))
pal = c('EZHIP_pos' = 'red', 'EZHIP_neg' = 'blue', 'H3K27me3_Low' = 'deepskyblue')
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Nickname)) +
scale_color_manual(values = pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
meta = meta %>% filter(Nickname %in% samples)
df = df[,meta$Sample]
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Nickname)) +
scale_color_manual(values = pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
pattern = 'baryawno_n100.ssgsea.txt'
group_pal = c('WT' = 'blue', 'EZHIP' = 'red')
atlas_prefix = 'baccin'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:4){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell', 'hMSC_cell'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "hMSC_cell"
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "U2OS_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'MG63_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "MG63_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('MG63_cell', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "MG63_cell"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
atlas_prefix = 'baccin'
atlas_path = '/lustre06/project/6004736/alvann/from_narval/REFERENCES/Bone_Marrow_Baccin/out/01_cluster_markers/bone_atlas_baccin_gene_sets.n100.Rda'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_cell', 'KHOS_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_cell"
## [1] "KHOS_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "U2OS_cell"
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('KHOS_xeno', 'MG63_xeno'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "KHOS_xeno"
## [1] "MG63_xeno"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.frame(Sample = names(df)) %>%
separate(Sample, sep = '[.]', into = c('Comp', 'Sample_Short'), remove = F) %>%
separate(Comp, sep = '_', into = c('Cell_Line', 'Type'), remove = F) %>%
mutate(Group = ifelse(grepl('EZHIP|C1', Sample), 'EZHIP', 'WT'))
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Comp)) +
scale_color_manual(values = group_pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black" ) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
group_pal = c('EZHIP_neg' = 'blue', 'EZHIP_pos' = 'red', 'H3K27me3_Low' = 'deepskyblue')
clical_meta = '/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/patient_clinical_seq/out/01_raptor_exprtools/EZHIP_neg_VsEZHIP_pos_no_reps/info.samples.tsv'
ssgsea_files = list.files(path = "/lustre06/project/6004736/alvann/from_narval/220318_EZHIP/RNA/",
pattern = pattern,
full.names = T,
recursive = T) %>%
data.frame(group = c('hMSC_cell', 'KHOS_cell', 'MG63_cell', 'clinical', 'U2OS_cell', 'KHOS_xeno', 'MG63_xeno'))
names(ssgsea_files)[1] = c('path')
ssgsea_files = filter(ssgsea_files, group %in% c('clinical'))
l = list()
for(g in ssgsea_files$group){
print(g)
l[[g]] = data.table::fread(ssgsea_files[ssgsea_files$group == g,]$path)
}
## [1] "clinical"
df = do.call(cbind, l)
names(df)[1] = 'tmp'
df = df %>%
select(!ends_with('ID')) %>%
column_to_rownames('tmp')
meta = data.table::fread(clical_meta) %>%
mutate(Sample = paste0('clinical.', Nickname))
pal = c('EZHIP_pos' = 'red', 'EZHIP_neg' = 'blue')
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Nickname)) +
scale_color_manual(values = pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Sex, label = Nickname)) +
scale_color_manual(values = c('green', 'orange')) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1) +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Age, label = Nickname)) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1) +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`
meta = meta %>% filter(Nickname %in% samples)
df = df[,meta$Sample]
pca = prcomp(df %>% t)
var = tibble(PC = factor(1:length(pca$sdev^2)),
variance = pca$sdev) %>%
mutate(pct = variance/sum(variance)*100)
for(i in 1:5){
pc.1 = paste0('PC', i)
pc.2 = paste0('PC', i+1)
print(pc.1)
print(pc.2)
pca_plot = pca$x %>%
as.data.frame() %>%
rownames_to_column('Sample') %>%
left_join(meta) %>%
ggplot(aes(x=.data[[pc.1]], y=.data[[pc.2]], color = Group, label = Nickname)) +
scale_color_manual(values = pal) +
geom_point(size = 4) +
ggrepel::geom_text_repel(show.legend = F, box.padding = 0.5, max.overlaps = Inf, color="black", size = 2) +
theme_bw() +
theme(text = element_text(size = 15),
axis.line = element_line(size = 0.5),
panel.border = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank(),
aspect.ratio = 1,
legend.position = 'none') +
xlab(paste0(pc.1, ' ', round(var[i,]$pct, digits = 1), '%')) +
ylab(paste0(pc.2, ' ', round(var[i+1,]$pct, digits = 1), '%'))
print(pca_plot)
}
## [1] "PC1"
## [1] "PC2"
## Joining with `by = join_by(Sample)`
## [1] "PC2"
## [1] "PC3"
## Joining with `by = join_by(Sample)`
## [1] "PC3"
## [1] "PC4"
## Joining with `by = join_by(Sample)`
## [1] "PC4"
## [1] "PC5"
## Joining with `by = join_by(Sample)`
## [1] "PC5"
## [1] "PC6"
## Joining with `by = join_by(Sample)`